***********************************************************************************************************
* This replication file estimates demand expansion due to entry, as well as fixed cost and lambda bounds. *
***********************************************************************************************************
*1. Predict output levels based on the demand model and load estimated market expansion measures.
*2. Calculate fixed cost bounds based on presumed values of lambda (0 and 1).
*3. Calculate lambda bounds from switches.
*4. Calculate lambda bounds based on average and district-level fixed costs.

***********************
* 0 Set path to files *
***********************
clear all
capture log close 
set more off
set trace off
mata: mata set matastrict off
cd $main_directory

****************************************************************************************************
* 1. Predict output levels based on the demand model and load estimated market expansion measures. * 
****************************************************************************************************

use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"

//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
//Set predetermined values 
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

gen association=(NotPerOff>1)
label define astatus 0 "Single notary" 1 "Association"
label value association astatus

//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

//Calculate the total demand on the market
qui su Q1 if common==1 
sca sumQ1=r(sum)
sca di sumQ1

//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma_Q1=scale*sumQ1/sumPopulation
sca di gamma_Q1
//Calculate the market size (i.e. number of possible transactions)
gen market_size_Q1=gamma_Q1*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand_Q1=Q1 
drop if firm_demand_Q1==.
drop if firm_demand_Q1<=0

// Controls: 
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total_Q1 firm_ID firm_demand_Q1 market_ID_num market_size_Q1 $controls_utility

*************************
* STATUS QUO PREDICTION *
*************************
//We begin with predictions of the demand and consumer surplus, since they require the full dataset (all market-notary combinations).
//Demand prediction Q1_IV
sort market_ID
mata:
 X    = st_data(., "$controls_total_Q1") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
	Results=spdemand_log(X,betas_Q1)
	n_firms=rows(Results)
    st_numscalar("n_firms",n_firms)
	firm_ID    = st_data(., "firm_ID") 
	Prediction=J(nx,1,.)
	for(i=1; i<=nx; i++) { //i goes over the market-notary combinations
    for(j=1; j<=n_firms; j++) {
	if (firm_ID[i]==Results[j,1]) {
		Prediction[i]=Results[j,2]
	}
	}
	}
prediction_exp=exp(Prediction)
st_addvar("double","Q1_IV")
st_store(.,"Q1_IV",prediction_exp)
end


//Consumer surplus prediction CS_total_N
sort market_ID
mata:
 X    = st_data(., "$controls_total_Q1") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
   CS_matrix=consumer_surplus(X,betas_Q1,transportation_cost)
   CS_total_Q1=sum(CS_matrix[,2])
   st_numscalar("CS_total_Q1_current", CS_total_Q1)
end
di CS_total_Q1_current
gen CS_total_Q1_current=lambda*CS_total_Q1_current

//Demand prediction Q2_IV

//Calculate the total demand on the market
qui su Q2 if common==1
sca sumQ2=r(sum)
sca di sumQ2

//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma_Q2=scale*sumQ2/sumPopulation
sca di gamma_Q2
//Calculate the market size (i.e. number of possible transactions)
gen market_size_Q2=gamma_Q2*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand_Q2=Q2 
drop if firm_demand_Q2==.
drop if firm_demand_Q2<=0

//Define the values inside the X matrix.
global controls_total_Q2 firm_ID firm_demand_Q2 market_ID_num market_size_Q2 $controls_utility

sort market_ID
mata:
 X    = st_data(., "$controls_total_Q2") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
	Results=spdemand_log(X,betas_Q2)
	n_firms=rows(Results)
    st_numscalar("n_firms",n_firms)
	firm_ID    = st_data(., "firm_ID") 
	Prediction=J(nx,1,.)
	for(i=1; i<=nx; i++) { //i goes over the market-notary combinations
    for(j=1; j<=n_firms; j++) {
	if (firm_ID[i]==Results[j,1]) {
		Prediction[i]=Results[j,2]
	}
	}
	}
prediction_exp=exp(Prediction)
st_addvar("double","Q2_IV")
st_store(.,"Q2_IV",prediction_exp)
end


//Consumer surplus prediction CS_total_N
sort market_ID
mata:
 X    = st_data(., "$controls_total_Q2") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
   CS_matrix=consumer_surplus(X,betas_Q2,transportation_cost)
   CS_total_Q2=sum(CS_matrix[,2])
   st_numscalar("CS_total_Q2_current", CS_total_Q2)
end
gen CS_total_Q2_current=lambda*CS_total_Q2_current
gen CS_total_current=CS_total_Q2_current+CS_total_Q1_current

//We then calculate costs and profits (this requires only 1 observation per notary).
keep if common==1
sort firm_ID
gen L2_Q=exp(-aL)*(alpha*Q1_IV+(1-alpha)*Q2_IV)^(1/nu)
gen M2_Q=exp(-aM/phi)*(alpha*Q1_IV+(1-alpha)*Q2_IV)^(1/(nu*phi))
su L2_Q
gen L2_industry_current=r(sum)

su M2_Q
gen M2_industry_current=r(sum)

*variable cost and marginal cost functions
gen VC_Q=L2_Q+M2_Q
su VC_Q
gen VC_industry_current=r(sum)

*Output levels
su Q1_IV
gen Q1_industry_current=r(sum)
su Q2_IV
gen Q2_industry_current=r(sum)
gen Q_IV=Q1_IV+Q2_IV
su Q_IV
gen Q_industry_current=r(sum)
gen sales_per_notary=Q_IV/NotPerOff
su sales_per_notary
gen Q_average_predicted_per_notary=r(mean)
gen sales_per_notary_Q1=Q1_IV/NotPerOff
gen sales_per_notary_Q2=Q2_IV/NotPerOff
su sales_per_notary_Q1
gen Q1_average_predicted_per_notary=r(mean)
su sales_per_notary_Q2
gen Q2_average_predicted_per_notary=r(mean)
su NotPerOff
sca notaries=r(sum) //Calculate the total number of notaries

*Variable profits at the status quo
gen v_profit_individual_n=p_Q1*Q1_IV+p_Q2*Q2_IV-VC_Q
su v_profit_individual_n 
gen v_profit_per_notary=v_profit_individual_n/NotPerOff
su v_profit_per_notary

*Industry profitability
gen revenue_individual_current=p_Q1*Q1_IV+p_Q2*Q2_IV
su revenue_individual_current
gen revenue_industry_current=r(sum)
gen v_profit_industry_current=p_Q1*Q1_industry_current+p_Q2*Q2_industry_current-VC_industry_current
gen profit_industry_current=v_profit_industry_current-fixed_cost*notaries
*This is the welfare without fixed costs
gen welfare_no_f_current=v_profit_industry_current+CS_total_current //Note that here already we have multiplied by lambda!
sum Q_industry_current v_profit_industry_current CS_total_current profit_industry_current welfare_no_f_current

*******************************************************************************
*2. Calculate fixed cost bounds based on presumed values of lambda (0 and 1). *
*******************************************************************************

//Counterfactual output (calculated in the previous 2 stata files)
mata: mata matuse "Data_Belgium\Generated_data\sales_Q1_add", replace
mata: mata matuse "Data_Belgium\Generated_data\sales_Q2_add", replace
mata: mata matuse "Data_Belgium\Generated_data\sales_Q1_remove", replace
mata: mata matuse "Data_Belgium\Generated_data\sales_Q2_remove", replace

//Calculate market expansion and business stealing

//Collect the information on changes in output due to the removal of a notary from each office.
mata:
Q1_own_remove=diagonal(sales_Q1_remove) //The diagonal elements of sales_Q1_remove represent the sales of an office when 1 notary is removed from it.
Q2_own_remove=diagonal(sales_Q2_remove)
Q_own_remove=Q1_own_remove+Q2_own_remove
Q1_industry_remove=rowsum(sales_Q1_remove) //The row-sum of sales_Q1_remove represents the sales of all offices when 1 notary is removed from a given office.
Q2_industry_remove=rowsum(sales_Q2_remove)
sum(Q1_own_remove:==0)
Q_industry_remove=rowsum(sales_Q1_remove)+rowsum(sales_Q2_remove)
st_addvar("double","Q_own_remove")
st_store(.,"Q_own_remove",Q_own_remove)
st_addvar("double","Q1_own_remove")
st_store(.,"Q1_own_remove",Q1_own_remove)
st_addvar("double","Q2_own_remove")
st_store(.,"Q2_own_remove",Q2_own_remove)
st_addvar("double","Q_industry_remove")
st_store(.,"Q_industry_remove",Q_industry_remove)
st_addvar("double","Q1_industry_remove")
st_store(.,"Q1_industry_remove",Q1_industry_remove)
st_addvar("double","Q2_industry_remove")
st_store(.,"Q2_industry_remove",Q2_industry_remove)
end
replace Q1_own_remove=0 if NotPerOff==1 //The values of output are missing if there is only one notary in the office, since if it is removed, then the notary office is no longer open. Therefore sales are expected to be 0.
replace Q2_own_remove=0 if NotPerOff==1


//Collect the information on changes in output due to the addition of a notary to each office.
mata:
Q1_own_add=diagonal(sales_Q1_add)
Q2_own_add=diagonal(sales_Q2_add)
Q_own_add=Q1_own_add+Q2_own_add
Q_industry_add=rowsum(sales_Q1_add)+rowsum(sales_Q2_add)
st_addvar("double","Q_own_add")
st_store(.,"Q_own_add",Q_own_add)
st_addvar("double","Q1_own_add")
st_store(.,"Q1_own_add",Q1_own_add)
st_addvar("double","Q2_own_add")
st_store(.,"Q2_own_add",Q2_own_add)
st_addvar("double","Q_industry_add")
st_store(.,"Q_industry_add",Q_industry_add)
end
//Individual variable profit at the status quo
su v_profit_individual_n
//Individual variable profit after removal of a notary
gen v_profit_individual_n_1=p_Q1*Q1_own_remove+p_Q2*Q2_own_remove-(exp(-aL)*(alpha*Q1_own_remove+(1-alpha)*Q2_own_remove)^(1/nu)+exp(-aM/phi)*(alpha*Q1_own_remove+(1-alpha)*Q2_own_remove)^(1/(nu*phi)))
replace v_profit_individual_n_1=0 if NotPerOff==1
//Calculate the change in variable profits due to the entry of the marginal notary in an office.
gen change_v_profit_individual_n=v_profit_individual_n-v_profit_individual_n_1
su change_v_profit_individual_n
//Individual variable profit after addition of a notary
gen v_profit_individual_n1=p_Q1*Q1_own_add+p_Q2*Q2_own_add-(exp(-aL)*(alpha*Q1_own_add+(1-alpha)*Q2_own_add)^(1/nu)+exp(-aM/phi)*(alpha*Q1_own_add+(1-alpha)*Q2_own_add)^(1/(nu*phi)))
gen change_v_profit_individual_n1=v_profit_individual_n1-v_profit_individual_n
su change_v_profit_individual_n1

//Total output
replace Q_own_remove=0 if Q_own_remove==.
//Calculate the additional sales due to the entry of the marginal notary
gen change_Q_own=Q_IV-Q_own_remove
su change_Q_own if association==0
gen change_single_notary=r(mean)
su change_Q_own if association==1
gen change_association=r(mean)

//Generate the change in sales for the entire industry
gen change_Q_total=Q_industry_current-Q_industry_remove
gen change_Q_perc=change_Q_total/Q_average_predicted_per_notary
//Calculate the sales by other notary offices
gen Q_others_current=Q_industry_current-Q_IV //Status quo
gen Q_others_remove=Q_industry_remove-Q_own_remove //Counterfactual
//Calculate the change in the sales of other firms due to entry in the specific notary office (i.e. business stealing)
gen change_Q_others=Q_others_current-Q_others_remove
//Calculate this as a percentage of average notary sales
gen diversion_ratio=change_Q_others/change_single_notary if association==0
replace diversion_ratio=change_Q_others/change_association if association==1
//Table 5
bysort association: su change_Q_own change_Q_total change_Q_perc change_Q_others diversion_ratio

//Calculate the variable profits for alternative levels of sales. Here it is very important to sort by the firm ID to ensure correct merging of the mata data.
sort firm_ID
mata:
//Calculate labor costs for the industry in the case of addition or removal
LC_industry_add=rowsum(J(1152,1152,exp(-aL)):*(alpha:*sales_Q1_add:+(1-alpha):*sales_Q2_add):^(1/nu))
LC_industry_remove=rowsum(J(1152,1152,exp(-aL)):*(alpha:*sales_Q1_remove:+(1-alpha):*sales_Q2_remove):^(1/nu))
//Calculate intermediate input costs for the industry in the case of addition or removal
IntC_industry_add=rowsum(exp(J(1152,1152,(-aM/phi))):*(alpha:*sales_Q1_add:+(1-alpha):*sales_Q2_add):^(1/(nu*phi)))
IntC_industry_remove=rowsum(exp(J(1152,1152,(-aM/phi))):*(alpha:*sales_Q1_remove:+(1-alpha):*sales_Q2_remove):^(1/(nu*phi)))
//Calculate total industry costs in the case of addition or removal of notaries
VC_industry_add=LC_industry_add+IntC_industry_add
VC_industry_remove=LC_industry_remove+IntC_industry_remove
st_addvar("double","VC_industry_add")
st_store(.,"VC_industry_add",VC_industry_add)
st_addvar("double","VC_industry_remove")
st_store(.,"VC_industry_remove",VC_industry_remove)
//Revenue and variable profit for one product
Q1_industry_add=rowsum(sales_Q1_add)
Q1_industry_remove=rowsum(sales_Q1_remove)
Q2_industry_add=rowsum(sales_Q2_add)
Q2_industry_remove=rowsum(sales_Q2_remove)
R_industry_add=p_Q1*Q1_industry_add+p_Q2*Q2_industry_add
R_industry_remove=p_Q1*Q1_industry_remove+p_Q2*Q2_industry_remove
v_profit_industry_add=R_industry_add-VC_industry_add
v_profit_industry_remove=R_industry_remove-VC_industry_remove
st_addvar("double","v_profit_industry_add")
st_store(.,"v_profit_industry_add",v_profit_industry_add)
st_addvar("double","v_profit_industry_remove")
st_store(.,"v_profit_industry_remove",v_profit_industry_remove)
end

//Load the counterfactual surplus which was calculated in the previous file
mata: mata matuse "Data_Belgium\Generated_data\CS_total_Q1_remove", replace
mata: mata matuse "Data_Belgium\Generated_data\CS_total_Q1_add", replace
mata: mata matuse "Data_Belgium\Generated_data\CS_total_Q2_remove", replace
mata: mata matuse "Data_Belgium\Generated_data\CS_total_Q2_add", replace
mata: CS_total_add=CS_total_Q1_add+CS_total_Q2_add
mata: CS_total_remove=CS_total_Q1_remove+CS_total_Q2_remove

sort firm_ID
mata:
st_addvar("double","CS_total_add")
st_store(.,"CS_total_add",CS_total_add)
st_addvar("double","CS_total_remove")
st_store(.,"CS_total_remove",CS_total_remove)
end
//Calculate average variable profits and consumer surplus from adding and removing a notary (these generate the lower and upper bound of the fixed costs)
gen V_add=v_profit_industry_add-v_profit_industry_current
su V_add
sca meanV_add=r(mean)
gen C_add=CS_total_add-CS_total_current
su C_add
sca meanC_add=r(mean)
gen V_remove=v_profit_industry_current-v_profit_industry_remove
su V_remove  
sca meanV_remove=r(mean)
gen C_remove=CS_total_current-CS_total_remove
su C_remove
sca meanC_remove=r(mean)

************************************
* FIXED COST BOUNDS WITH BOOTSTRAP *
************************************
//Calculate fixed cost bounds
gen f_lower_add_welfare=lambda*meanC_add+meanV_add
su f_lower_add_welfare
sca f_lower_add_welfare=r(mean)
gen f_upper_remove_welfare=lambda*meanC_remove+meanV_remove
su f_upper_remove_welfare 
sca f_upper_remove_welfare=r(mean)
gen f_lower_add_industry=meanV_add
su f_lower_add_industry
sca f_lower_add_industry=r(mean)
gen f_upper_remove_industry=meanV_remove
su f_upper_remove_industry 
sca f_upper_remove_industry=r(mean)

//Calculate 95% confidence intervals
gen b_id=_n //Make a bootstrap identifier
qui su b_id
sca nobs=r(max) //This counts how many observations should be in each sample. We use the number of observations in the current dataset as the chosen sample size.
sca B=1000 //Select the number of bootstrap draws - we are taking a 1000 samples with the number of observations in each sample being equal to the number of observations in the dataset. 
set matsize 5000 //This just allows us to store large matrices in stata
set seed 1234
matrix b_f_remove_welfare = J(B,1,.) //This is a column vector in which we will store the mean f remove estimates, which will serve to identify the upper bound of the fixed costs under welfare maximization.
matrix b_f_add_welfare = J(B,1,.) //This is a column vector in which we will store the mean f add estimates, which will serve to identify the lower bound of the fixed costs under welfare maximization.
matrix b_f_remove_industry = J(B,1,.) //This is a column vector in which we will store the mean f remove estimates, which will serve to identify the upper bound of the fixed costs under joint profit maximization.
matrix b_f_add_industry = J(B,1,.) //This is a column vector in which we will store the mean f add estimates, which will serve to identify the lower bound of the fixed costs under joint profit maximization.

forvalues b= 1/1000 {
//Take random draws of notary office ids - this will be the sample for the bootstrap
if `b'> 1 {
drop randnum
}
gen randnum = runiformint(1,nobs) //We draw a random integer between 1 and the number of observations
//Make empty matrices to store the values of profits and consumer surplus for each observation
matrix V_remove_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of V_remove (variable profits from removal)
matrix C_remove_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of C_remove (consumer surplus from removal)
matrix V_add_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of V_add (variable profits from addition)
matrix C_add_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of C_add (consumer surplus from addition)
//Go over the ids in the sample and store the relevant information
forvalues i= 1/`=nobs' {
  mat V_remove_mat[`i',1]=V_remove[randnum[`i']] //This stores the variable profits from removal from the observation in the original sample which corresponds to the random number that has been drawn.
  mat C_remove_mat[`i',1]=C_remove[randnum[`i']] //A similar approach is taken for the rest of the relevant information
  mat V_add_mat[`i',1]=V_add[randnum[`i']]
  mat C_add_mat[`i',1]=C_add[randnum[`i']]
}
//Calculate the ratio of sums for the draw and store it
mata : st_matrix("mean_V_remove", colsum(st_matrix("V_remove_mat")) :/ colnonmissing(st_matrix("V_remove_mat"))) //The sum of the variable profits is divided by the number of non-missing cells in the variable profit vector, i.e. the number of observations.
mata : st_matrix("mean_C_remove", colsum(st_matrix("C_remove_mat")) :/ colnonmissing(st_matrix("C_remove_mat")))
mat b_f_remove_welfare[`b',1]=lambda*mean_C_remove[1,1]+mean_V_remove[1,1]
mat b_f_remove_industry[`b',1]=mean_V_remove[1,1]

mata : st_matrix("mean_V_add", colsum(st_matrix("V_add_mat")) :/ colnonmissing(st_matrix("V_add_mat")))
mata : st_matrix("mean_C_add", colsum(st_matrix("C_add_mat")) :/ colnonmissing(st_matrix("C_add_mat")))
mat b_f_add_welfare[`b',1]=lambda*mean_C_add[1,1]+mean_V_add[1,1]
mat b_f_add_industry[`b',1]=mean_V_add[1,1]
}
drop b_id randnum

//Make a matrix with the upper and lower bounds of lambda
mat f_bootstrap = b_f_remove_welfare,b_f_add_welfare,b_f_remove_industry,b_f_add_industry
matrix colnames f_bootstrap = b_f_remove_welfare b_f_add_welfare b_f_remove_industry b_f_add_industry
//Export the matrix into a new dataset of bootstrap iteration means
preserve
clear
svmat f_bootstrap, names(col)
//Calculate confidence intervals for fixed cost bounds if lambda=1
sca f_upper_remove_welfare=round(f_upper_remove_welfare,0.001)
di f_upper_remove_welfare 
egen p97_5_welfare_pct  = pctile(b_f_remove_welfare), p(97.5)
su p97_5_welfare_pct 
sca p97_5_welfare_unrounded=r(mean)
sca p97_5_welfare=round(p97_5_welfare_unrounded,0.001)
di p97_5_welfare

sca f_lower_add_welfare=round(f_lower_add_welfare,0.001)
di f_lower_add_welfare 
egen p2_5_welfare_pct= pctile(b_f_add_welfare), p(2.5)
su p2_5_welfare_pct 
sca p2_5_welfare_unrounded=r(mean)
sca p2_5_welfare=round(p2_5_welfare_unrounded,0.001)
di p2_5_welfare

//Calculate confidence intervals for fixed cost bounds if lambda=0
di f_upper_remove_industry 
egen p97_5_industry_pct= pctile(b_f_remove_industry), p(97.5)
su p97_5_industry_pct 
sca p97_5_industry_unrounded=r(mean)
sca p97_5_industry=round(p97_5_industry_unrounded,0.001)

di f_lower_add_industry
sca f_lower_add_industry=round(f_lower_add_industry,0.001)
egen p2_5_industry_pct= pctile(b_f_add_industry), p(2.5)
su p2_5_industry_pct 
sca p2_5_industry_unrounded=r(mean)
sca p2_5_industry=round(p2_5_industry_unrounded,0.001)

*Table 8 + Table A2 Rows 1 and 3 (t_d=100)
di f_lower_add_industry " & " f_upper_remove_industry " & " p2_5_industry " &  " p97_5_industry
di f_lower_add_welfare " & " f_upper_remove_welfare " & " p2_5_welfare " &  " p97_5_welfare 
restore

********************************************
*3. Calculate lambda bounds from switches. *
********************************************

//Here we add information on the possible consumer surplus increasing and decreasing switches. We only consider existing locations.
merge 1:1 bvdid_num using "Estimates\Lambda_bounds\lambda_1.dta"
global ids 2 3 4 5 6 8 9 10 11 12
drop _merge

foreach i in $ids {
merge 1:1 bvdid_num using "Estimates\Lambda_bounds\lambda_`=`i''.dta", update
drop _merge
}

//This is the potential consumer surplus increase from moving a notary to a different location.
su consumer_surplus_pos
sca meanC_pos=r(mean)
sca N_obs_pos=r(N)
//This is the cost to producers that the switch would have generated
su producer_surplus_pos 
sca meanV_pos=r(mean)
//This is the upper bound of lambda based on CS-increasing switches
gen lambda_upper_switch=-meanV_pos/meanC_pos
su lambda_upper_switch
sca lambda_upper_switch=r(mean)

sca di lambda_upper_switch

//This is the decrease in consumer surplus if we were to switch to inferior locations
su consumer_surplus_neg 
sca meanC_neg=r(mean)
//This is the cost to producers that the switch would have generated
su producer_surplus_neg  
sca meanV_neg=r(mean)
//This is the lower bound of lambda based on CS-decreasing switches
gen lambda_lower_switch=-meanV_neg/meanC_neg
su lambda_lower_switch
sca lambda_lower_switch=r(mean)
sca di lambda_lower_switch

//Calculate 95% confidence intervals
gen b_id=_n //Make a bootstrap identifier
qui su b_id
sca nobs=r(max)
sca B=1000 //Select the number of bootstrap draws
set matsize 5000 //This just allows us to store large matrices in stata
matrix b_lambda_neg = J(B,1,.) //This is a column vector in which we will store the mean lambda remove estimates
matrix b_lambda_pos = J(B,1,.) //This is a column vector in which we will store the mean lambda add estimates

forvalues b= 1/1000 {
//Take random draws of notary office ids - this will be the sample for the bootstrap
if `b'> 1 {
drop randnum
}
gen randnum = runiformint(1,nobs) 

//Make empty matrices to store the values of profits and consumer surplus for each observation
matrix V_neg_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of V_neg
matrix C_neg_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of C_neg
matrix V_pos_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of V_pos
matrix C_pos_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of C_pos

//Go over the ids in the sample and store the relevant information
forvalues i= 1/`=nobs' {
  mat V_neg_mat[`i',1]=producer_surplus_neg[randnum[`i']]
  mat C_neg_mat[`i',1]=consumer_surplus_neg[randnum[`i']]
  mat V_pos_mat[`i',1]=producer_surplus_pos[randnum[`i']]
  mat C_pos_mat[`i',1]=consumer_surplus_pos[randnum[`i']]
}
//Calculate the ratio of sums for the draw and store it
mata : st_matrix("mean_V_neg", colsum(st_matrix("V_neg_mat")) :/ colnonmissing(st_matrix("V_neg_mat")))
mata : st_matrix("mean_C_neg", colsum(st_matrix("C_neg_mat")) :/ colnonmissing(st_matrix("C_neg_mat")))
mat b_lambda_neg[`b',1]=(0-mean_V_neg[1,1])/mean_C_neg[1,1]
mata : st_matrix("mean_V_pos", colsum(st_matrix("V_pos_mat")) :/ colnonmissing(st_matrix("V_pos_mat")))
mata : st_matrix("mean_C_pos", colsum(st_matrix("C_pos_mat")) :/ colnonmissing(st_matrix("C_pos_mat")))
mat b_lambda_pos[`b',1]=(0-mean_V_pos[1,1])/mean_C_pos[1,1]
//mat b_lambda_pos[`b',1]= (colnonmissing(st_matrix("C_pos_mat")/1000)*(0-mean_V_pos[1,1])/mean_C_pos[1,1]+(1-(colnonmissing(st_matrix("C_pos_mat")/1000))*1
}
drop b_id randnum
//Make a matrix with the upper and lower bounds of lambda
mat lambda_bootstrap = b_lambda_neg,b_lambda_pos
matrix colnames lambda_bootstrap = b_lambda_neg b_lambda_pos
//Export the matrix into a new dataset of bootstrap iteration means
preserve
clear
svmat lambda_bootstrap, names(col)
di lambda_lower_switch
egen p2_5_switch= pctile(b_lambda_neg), p(2.5)
su p2_5_switch 
sca p2_5_switch_unrounded=r(mean)
sca p2_5_switch=round(p2_5_switch_unrounded,0.001)
di lambda_upper_switch
su b_lambda_pos, de
egen p97_5_switch= pctile(b_lambda_pos), p(97.5)
su p97_5_switch 
sca p97_5_switch_unrounded=r(mean)
sca p97_5_switch_switch=round(p97_5_switch_unrounded,0.001)
di lambda_lower_switch " & " lambda_upper_switch " & " p2_5_switch " &  " p97_5_switch
restore

******************************************************************************
*4. Calculate lambda bounds based on average and district-level fixed costs. *
******************************************************************************
//Calculate mean fixed costs across all locations
gen F = (EBITDA-EBIT)/NotPerOff
replace F=. if F<=0
sum F
sca meanF=r(mean)
sca meanF=round(meanF)
sca di meanF
mata: meanF=st_numscalar("meanF")

//Calculate the upper and lower bounds
gen lambda_upper_add=(meanF-meanV_add)/meanC_add
su lambda_upper_add
sca lambda_upper_add=r(mean)

gen lambda_lower_remove=(meanF-meanV_remove)/meanC_remove
su lambda_lower_remove
sca lambda_lower_remove=r(mean)

//Calculate the 95% confidence interval
gen b_id=_n //Make a bootstrap identifier
qui su b_id
sca nobs=r(max)
sca B=1000 //Select the number of bootstrap draws
set matsize 5000 //This just allows us to store large matrices in stata
matrix b_lambda_remove = J(B,1,.) //This is a column vector in which we will store the mean lambda remove estimates
matrix b_lambda_add = J(B,1,.) //This is a column vector in which we will store the mean lambda add estimates

forvalues b= 1/1000 {
//Take random draws of notary office ids - this will be the sample for the bootstrap
if `b'> 1 {
drop randnum
}
gen randnum = runiformint(1,nobs) 

//Make empty matrices to store the values of profits and consumer surplus for each observation
matrix V_remove_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of V_remove
matrix C_remove_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of C_remove
matrix V_add_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of V_add
matrix C_add_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of C_add


//Go over the ids in the sample and store the relevant information
forvalues i= 1/`=nobs' {
  mat V_remove_mat[`i',1]=V_remove[randnum[`i']]
  mat C_remove_mat[`i',1]=C_remove[randnum[`i']]
  mat V_add_mat[`i',1]=V_add[randnum[`i']]
  mat C_add_mat[`i',1]=C_add[randnum[`i']]
}
//Calculate the ratio of sums for the draw and store it
mata : st_matrix("mean_V_remove", colsum(st_matrix("V_remove_mat")) :/ colnonmissing(st_matrix("V_remove_mat")))
mata : st_matrix("mean_C_remove", colsum(st_matrix("C_remove_mat")) :/ colnonmissing(st_matrix("C_remove_mat")))
mat b_lambda_remove[`b',1]=(meanF-mean_V_remove[1,1])/mean_C_remove[1,1]
mata : st_matrix("mean_V_add", colsum(st_matrix("V_add_mat")) :/ colnonmissing(st_matrix("V_add_mat")))
mata : st_matrix("mean_C_add", colsum(st_matrix("C_add_mat")) :/ colnonmissing(st_matrix("C_add_mat")))
mat b_lambda_add[`b',1]=(meanF-mean_V_add[1,1])/mean_C_add[1,1]
}
drop b_id randnum

//Make a matrix with the upper and lower bounds of lambda
mat lambda_bootstrap_ar = b_lambda_remove,b_lambda_add
matrix colnames lambda_bootstrap_ar = b_lambda_remove b_lambda_add
//Export the matrix into a new dataset of bootstrap iteration means
preserve
clear
svmat lambda_bootstrap_ar, names(col)
di lambda_lower_remove
egen p2_5 = pctile(b_lambda_remove), p(2.5)
su p2_5
sca p2_5=r(mean)
su b_lambda_remove, de
di lambda_upper_add
egen p97_5 = pctile(b_lambda_add), p(97.5)
su p97_5
sca p97_5=r(mean)
su b_lambda_add, de
mata: lambda_upper_add=st_numscalar("lambda_upper_add")
mata: lambda_lower_remove=st_numscalar("lambda_lower_remove")
mata: p2_5=st_numscalar("p2_5")
mata: p97_5=st_numscalar("p97_5")
mata: bound_intervals=(lambda_lower_remove,lambda_upper_add,p2_5,p97_5)
//These are the lambda bounds from fixed costs which are equal across all districts.
*Table 9, row 1
mata: round_bounds=J(1,6,.)
mata: round_bounds[1,1..4]=round(bound_intervals,0.001)
restore
su F
sca F_all_mean=r(mean)
sca F_all_se=r(sd)/sqrt(r(N))
mata: F_all_mean=st_numscalar("F_all_mean")
mata: F_all_se=st_numscalar("F_all_se")
mata: round_bounds[1,5]=F_all_mean
mata: round_bounds[1,6]=F_all_se

//Generate the lambda and fixed cost range graph
sca lambda_upper_f_l=meanV_add+lambda_upper_switch*meanC_add //This is the upper left corner of the feasible parameters. In other words, it shows the lower bound of f, when we use the upper bound of lambda from switches.
sca lambda_upper_f_u=meanV_remove+lambda_upper_switch*meanC_remove //This is the upper right corner of the feasible parameters. In other words, it shows the upper bound of f, when we use the upper bound of lambda from switches.
sca lambda_lower_f_l=meanV_add+lambda_lower_switch*meanC_add //This is the lower left corner of the feasible parameters. In other words, it shows the lower bound of f, when we use the lower bound of lambda from switches.
sca lambda_lower_f_u=meanV_remove+lambda_lower_switch*meanC_remove //This is the lower right corner of the feasible parameters. In other words, it shows the upper bound of f, when we use the lower bound of lambda from switches.
sca lambda_lower_f_external=(84-meanV_remove)/meanC_remove
sca lambda_upper_f_external=(84-meanV_add)/meanC_add
sca di lambda_lower_f_external
sca di lambda_upper_f_external

//Auxiliary value to help with graph creation in LaTeX
sca intercept_lambda_upper_theta=meanV_add-0.6*meanC_add //x-axis intercept lambda_upper(theta)
sca di intercept_lambda_upper_theta
sca intercept_lambda_upper_theta=meanV_add+1.1*meanC_add //upper bound of lambda_upper(theta) on graph
sca di intercept_lambda_upper_theta


sca intercept_lambda_lower_theta=meanV_remove-0.6*meanC_remove //x-axis intercept lambda_lower(theta)
sca di intercept_lambda_lower_theta
sca intercept_lambda_lower_theta=meanV_remove+1.1*meanC_remove //upper bound of lambda_lower(theta) on graph
sca di intercept_lambda_lower_theta

sca di lambda_upper_f_l
sca di lambda_upper_f_u
sca di lambda_lower_f_u
sca di lambda_lower_f_l

sca lower_triangle_lambda=(meanV_add-meanV_remove)/(meanC_remove-meanC_add)
sca lower_triangle_f=meanV_remove+lower_triangle_lambda*meanC_remove
sca di lower_triangle_f
sca di lower_triangle_lambda

//Figure 2
//The first line shows the lower bound of the fixed costs.
//The second line shows the upper bound of the fixed costs.
//The third line shows the upper bound lambda from switches.
//The fourth line shows the lower bound lambda from switches.
twoway (function f = meanV_add+x*meanC_add, range(`=lambda_lower_switch' `=lambda_upper_switch') color(black) horizontal) ///
 (function f = meanV_remove+x*meanC_remove, range(`=lambda_lower_switch' `=lambda_upper_switch') color(black) horizontal ytitle("lambda") legend(off)) ///
 (scatteri `=lambda_upper_switch' `=lambda_upper_f_l' `=lambda_upper_switch' `=lambda_upper_f_u', color(black) recast(line)) ///
 (scatteri `=lambda_lower_switch' `=lambda_lower_f_l' `=lambda_lower_switch' `=lambda_lower_f_u', color(black) recast(line)) ///
 (scatteri `=lambda_lower_switch' `=meanF' `=lambda_upper_switch' `=meanF', recast(line) xtitle("fixed costs in 1000 Euro") ytitle("weight on consumer surplus {&lambda}")  xlabel(-100 "-100" 0 "0" 84 "f" 100 "100" 200 "200" 300 "300") legend(off)) 



//Lambda bounds based on district specific fixed costs
bysort arrondjud: egen F_district=mean(F) //Calculate district-specific fixed costs
preserve
keep if arrondjud==3 | arrondjud==7
su F
sca F_3_7=r(mean)
sca F_3_7_se=r(sd)/sqrt(r(N))
restore
replace F_district=F_3_7 if arrondjud==3 | arrondjud==7

global arr 1 2 3 4 5 6 8 9 10 11 12
mata: rseed(1234)
foreach g in $arr {
global arrond `g'
preserve
if ($arrond != 3){
drop if arrondjud!=$arrond
}
else {
keep if  arrondjud==3 |  arrondjud==7 //These 2 districts are managed together.
}
su C_add
sca meanC_add_`=$arrond'=r(mean)
su V_add
sca meanV_add_`=$arrond'=r(mean)
su F_district
sca meanF_district_`=$arrond'=r(mean)

gen lambda_upper_add_`=$arrond'=(meanF_district_`=$arrond'-meanV_add_`=$arrond')/meanC_add_`=$arrond'
su lambda_upper_add_`=$arrond'
sca lambda_upper_add_`=$arrond'=r(mean)
su C_remove
sca meanC_remove_`=$arrond'=r(mean)
su V_remove  
sca meanV_remove_`=$arrond'=r(mean)
gen lambda_lower_remove_`=$arrond'=(meanF_district_`=$arrond'-meanV_remove_`=$arrond')/meanC_remove_`=$arrond'
su lambda_lower_remove_`=$arrond'
sca lambda_lower_remove_`=$arrond'=r(mean)
gen b_id=_n //Make a bootstrap identifier
qui su b_id
sca nobs=r(max)
sca di nobs
sca B=1000 //Select the number of bootstrap draws
set matsize 5000 //This just allows us to store large matrices in stata
matrix b_lambda_remove = J(B,1,.) //This is a column vector in which we will store the mean lambda remove estimates
matrix b_lambda_add = J(B,1,.) //This is a column vector in which we will store the mean lambda add estimates

forvalues b= 1/1000 {
//Take random draws of notary office ids - this will be the sample for the bootstrap
if `b'> 1 {
drop randnum
}
gen randnum = runiformint(1,nobs) 

//Make empty matrices to store the values of profits and consumer surplus for each observation
matrix V_remove_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of V_remove
matrix C_remove_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of C_remove
matrix V_add_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of V_add
matrix C_add_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of C_add

//Go over the ids in the sample and store the relevant information
forvalues i= 1/`=nobs' {
  mat V_remove_mat[`i',1]=V_remove[randnum[`i']]
  mat C_remove_mat[`i',1]=C_remove[randnum[`i']]
  mat V_add_mat[`i',1]=V_add[randnum[`i']]
  mat C_add_mat[`i',1]=C_add[randnum[`i']]
}
//Calculate the ratio of sums for the draw and store it
mata : st_matrix("mean_V_remove", colsum(st_matrix("V_remove_mat")) :/ colnonmissing(st_matrix("V_remove_mat")))
mata : st_matrix("mean_C_remove", colsum(st_matrix("C_remove_mat")) :/ colnonmissing(st_matrix("C_remove_mat")))
mat b_lambda_remove[`b',1]=(meanF_district_`=$arrond'-mean_V_remove[1,1])/mean_C_remove[1,1]
mata : st_matrix("mean_V_add", colsum(st_matrix("V_add_mat")) :/ colnonmissing(st_matrix("V_add_mat")))
mata : st_matrix("mean_C_add", colsum(st_matrix("C_add_mat")) :/ colnonmissing(st_matrix("C_add_mat")))
mat b_lambda_add[`b',1]=(meanF_district_`=$arrond'-mean_V_add[1,1])/mean_C_add[1,1]
}

//Make a matrix with the upper and lower bounds of lambda
mat lambda_bootstrap_ar = b_lambda_remove,b_lambda_add
matrix colnames lambda_bootstrap_ar = b_lambda_remove b_lambda_add
//Export the matrix into a new dataset of bootstrap iteration means
clear
svmat lambda_bootstrap_ar, names(col)
di lambda_lower_remove_`=$arrond'
egen p2_5 = pctile(b_lambda_remove), p(2.5)
su p2_5
sca p2_5_`=$arrond'=r(mean)
egen p5 = pctile(b_lambda_remove), p(5)
su p5
sca p5_`=$arrond'=r(mean)
su b_lambda_remove, de
di lambda_upper_add_`=$arrond'
egen p97_5 = pctile(b_lambda_add), p(97.5)
su p97_5
sca p97_5_`=$arrond'=r(mean)
egen p95 = pctile(b_lambda_add), p(95)
su p95
sca p95_`=$arrond'=r(mean)
su b_lambda_add, de
mata: lambda_upper_add=st_numscalar("lambda_upper_add_`=$arrond'")
mata: lambda_lower_remove=st_numscalar("lambda_lower_remove_`=$arrond'")
mata: p2_5=st_numscalar("p2_5_`=$arrond'")
mata: p97_5=st_numscalar("p97_5_`=$arrond'")
mata: lambda_lower_remove
mata: lambda_upper_add
mata: p2_5
mata: p97_5
mata: bound_intervals_`=$arrond'=(lambda_lower_remove,lambda_upper_add,p2_5,p97_5)
mata: mata matsave "Estimates\Lambda_bounds\bound_intervals_`=$arrond'_Fd" bound_intervals_`=$arrond', replace
restore
}
//Collect the information from all bound calculations and report mean fixed costs by district
mata: all_bounds=J(12,6,.)
local districts "1 2 4 5 6 8 9 10 11 12"
foreach district of local districts {
global arrond `district'
display "$arrond"
mata: mata matuse "Estimates\Lambda_bounds\bound_intervals_`=$arrond'_Fd", replace
mata: all_bounds[`=$arrond',1..4]=bound_intervals_`=$arrond'
su F if arrondjud==`=$arrond'
sca F_`=$arrond'=r(mean)
sca F_`=$arrond'_se=r(sd)/sqrt(r(N))
mata: F_`=$arrond' = st_numscalar("F_`=$arrond'")
mata: F_`=$arrond'_se = st_numscalar("F_`=$arrond'_se")
mata: all_bounds[`=$arrond',5]=F_`=$arrond'
mata: all_bounds[`=$arrond',6]=F_`=$arrond'_se
}
mata: mata matuse "Estimates\Lambda_bounds\bound_intervals_3_Fd", replace
mata: all_bounds[3,1..4]=bound_intervals_3
mata: F_3 = st_numscalar("F_3_7")
mata: F_3_se = st_numscalar("F_3_7_se")
mata: all_bounds[3,5]=F_3
mata: all_bounds[3,6]=F_3_se
mata: all_bounds=round_bounds\all_bounds

mata: round_all_bounds=round(all_bounds,0.001)
mata: st_matrix("round_all_bounds", round_all_bounds)
matrix rownames round_all_bounds=All Antwerp Brussels LiegeinclEupen Hainaut Leuven Limburg Luik Luxembourg Namur EastFlanders WalloonBrabant WestFlanders	
*Table 9
mat list round_all_bounds
